{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "[this doc on github](https://github.com/dotnet/interactive/tree/main/samples/notebooks/csharp/Samples)\n",
    "\n",
    "# Machine Learning over House Prices with ML.NET"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "csharp"
    }
   },
   "outputs": [],
   "source": [
    "#i \"nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet5/nuget/v3/index.json\" \n",
    "#i \"nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-tools/nuget/v3/index.json\" \n",
    "\n",
    "#r \"nuget:Microsoft.ML, 1.7.0\"\n",
    "#r \"nuget:Microsoft.ML.AutoML, 0.19.0\"\n",
    "#r \"nuget:Microsoft.Data.Analysis, 0.19.0\"\n",
    "#r \"nuget:XPlot.Plotly.Interactive, 4.0.6\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "csharp"
    }
   },
   "outputs": [],
   "source": [
    "using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags;\n",
    "using Microsoft.DotNet.Interactive.Formatting;\n",
    "using Microsoft.Data.Analysis;\n",
    "using XPlot.Plotly;"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "csharp"
    }
   },
   "outputs": [],
   "source": [
    "using System.IO;\n",
    "using System.Net.Http;\n",
    "string housingPath = \"housing.csv\";\n",
    "\n",
    "if (!File.Exists(housingPath))\n",
    "{\n",
    "    var contents = await new HttpClient()\n",
    "        .GetStringAsync(\"https://raw.githubusercontent.com/ageron/handson-ml2/master/datasets/housing/housing.csv\");\n",
    "        \n",
    "    File.WriteAllText(housingPath, contents);\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "csharp"
    }
   },
   "outputs": [],
   "source": [
    "var housingData = DataFrame.LoadCsv(housingPath);\n",
    "housingData"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "csharp"
    }
   },
   "outputs": [],
   "source": [
    "housingData.Description()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "csharp"
    }
   },
   "outputs": [],
   "source": [
    "Chart.Plot(\n",
    "    new Histogram()\n",
    "    {\n",
    "        x = housingData.Columns[\"median_house_value\"],\n",
    "        nbinsx = 20\n",
    "    }\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "csharp"
    }
   },
   "outputs": [],
   "source": [
    "var chart = Chart.Plot(\n",
    "    new Scattergl()\n",
    "    {\n",
    "        x = housingData.Columns[\"longitude\"],\n",
    "        y = housingData.Columns[\"latitude\"],\n",
    "        mode = \"markers\",\n",
    "        marker = new Marker()\n",
    "        {\n",
    "            color = housingData.Columns[\"median_house_value\"],\n",
    "            colorscale = \"Jet\"\n",
    "        }\n",
    "    }\n",
    ");\n",
    "\n",
    "chart.Width = 600;\n",
    "chart.Height = 600;\n",
    "chart.Display();"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "csharp"
    }
   },
   "outputs": [],
   "source": [
    "static T[] Shuffle<T>(T[] array)\n",
    "{\n",
    "    Random rand = new Random();\n",
    "    for (int i = 0; i < array.Length; i++)\n",
    "    {\n",
    "        int r = i + rand.Next(array.Length - i);\n",
    "        T temp = array[r];\n",
    "        array[r] = array[i];\n",
    "        array[i] = temp;\n",
    "    }\n",
    "    return array;\n",
    "}\n",
    "\n",
    "int[] randomIndices = Shuffle(Enumerable.Range(0, (int)housingData.Rows.Count).ToArray());\n",
    "int testSize = (int)(housingData.Rows.Count * .1);\n",
    "int[] trainRows = randomIndices[testSize..];\n",
    "int[] testRows = randomIndices[..testSize];\n",
    "\n",
    "DataFrame housing_train = housingData[trainRows];\n",
    "DataFrame housing_test = housingData[testRows];\n",
    "\n",
    "housing_train.Rows.Count.Display();\n",
    "housing_test.Rows.Count.Display();"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "csharp"
    }
   },
   "outputs": [],
   "source": [
    "using Microsoft.ML;\n",
    "using Microsoft.ML.Data;\n",
    "using Microsoft.ML.AutoML;"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "csharp"
    }
   },
   "outputs": [],
   "source": [
    "#!time\n",
    "\n",
    "var mlContext = new MLContext();\n",
    "\n",
    "var experiment = mlContext.Auto().CreateRegressionExperiment(maxExperimentTimeInSeconds: 15);\n",
    "var result = experiment.Execute(housing_train, labelColumnName:\"median_house_value\");"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "csharp"
    }
   },
   "outputs": [],
   "source": [
    "var scatters = result.RunDetails.Where(d => d.ValidationMetrics != null).GroupBy(\n",
    "    r => r.TrainerName,\n",
    "    (name, details) => new Scattergl()\n",
    "    {\n",
    "        name = name,\n",
    "        x = details.Select(r => r.RuntimeInSeconds),\n",
    "        y = details.Select(r => r.ValidationMetrics.MeanAbsoluteError),\n",
    "        mode = \"markers\",\n",
    "        marker = new Marker() { size = 12 }\n",
    "    });\n",
    "\n",
    "var chart = Chart.Plot(scatters);\n",
    "chart.WithXTitle(\"Training Time\");\n",
    "chart.WithYTitle(\"Error\");\n",
    "chart.Display();\n",
    "\n",
    "Console.WriteLine($\"Best Trainer:{result.BestRun.TrainerName}\");"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "dotnet_interactive": {
     "language": "csharp"
    }
   },
   "outputs": [],
   "source": [
    "var testResults = result.BestRun.Model.Transform(housing_test);\n",
    "\n",
    "var trueValues = testResults.GetColumn<float>(\"median_house_value\");\n",
    "var predictedValues = testResults.GetColumn<float>(\"Score\");\n",
    "\n",
    "var predictedVsTrue = new Scattergl()\n",
    "{\n",
    "    x = trueValues,\n",
    "    y = predictedValues,\n",
    "    mode = \"markers\",\n",
    "};\n",
    "\n",
    "var maximumValue = Math.Max(trueValues.Max(), predictedValues.Max());\n",
    "\n",
    "var perfectLine = new Scattergl()\n",
    "{\n",
    "    x = new[] {0, maximumValue},\n",
    "    y = new[] {0, maximumValue},\n",
    "    mode = \"lines\",\n",
    "};\n",
    "\n",
    "var chart = Chart.Plot(new[] {predictedVsTrue, perfectLine });\n",
    "chart.WithXTitle(\"True Values\");\n",
    "chart.WithYTitle(\"Predicted Values\");\n",
    "chart.WithLegend(false);\n",
    "chart.Width = 600;\n",
    "chart.Height = 600;\n",
    "chart.Display();"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".NET (C#)",
   "language": "C#",
   "name": ".net-csharp"
  },
  "language_info": {
   "file_extension": ".cs",
   "mimetype": "text/x-csharp",
   "name": "C#",
   "pygments_lexer": "csharp",
   "version": "8.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
